In [1]:
import graphlab

In [2]:
image_train = graphlab.SFrame('image_train_data/')


[INFO] This non-commercial license of GraphLab Create is assigned to satishkt@gmail.comand will expire on September 24, 2016. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-27554 - Server binary: /Users/Satish/.graphlab/anaconda/lib/python2.7/site-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1446462980.log
[INFO] GraphLab Server Version: 1.6.1

In [3]:
image_train.head(4)


Out[3]:
id image label deep_features image_array
24 Height: 32 Width: 32 bird [0.242871761322,
1.09545373917, 0.0, ...
[73.0, 77.0, 58.0, 71.0,
68.0, 50.0, 77.0, 69.0, ...
33 Height: 32 Width: 32 cat [0.525087952614, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[7.0, 5.0, 8.0, 7.0, 5.0,
8.0, 5.0, 4.0, 6.0, 7.0, ...
36 Height: 32 Width: 32 cat [0.566015958786, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[169.0, 122.0, 65.0,
131.0, 108.0, 75.0, ...
70 Height: 32 Width: 32 dog [1.12979578972, 0.0, 0.0,
0.778194487095, 0.0, ...
[154.0, 179.0, 152.0,
159.0, 183.0, 157.0, ...
[4 rows x 5 columns]

Train nearest neighbours model for retrieving images using deep features


In [4]:
knn_model = graphlab.nearest_neighbors.create(image_train,features=['deep_features'],label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [7]:
graphlab.canvas.set_target('browser')
cat = image_train[18:19]
cat['image'].show()


Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [8]:
knn_model.query(cat)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 30.503ms     |
PROGRESS: | Done         |         | 100         | 239.163ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[8]:
query_label reference_label distance rank
0 384 0.0 1
0 6910 36.9403137951 2
0 39777 38.4634888975 3
0 36870 39.7559623119 4
0 41734 39.7866014148 5
[5 rows x 4 columns]


In [9]:
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'],'id')

In [10]:
cat_neighbors  = get_images_from_ids(knn_model.query(cat))


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 22.692ms     |
PROGRESS: | Done         |         | 100         | 231.923ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [11]:
cat_neighbors['image'].show()


Canvas is updated and available in a tab in the default browser.

In [12]:
car  = image_train[8:9]

In [13]:
car['image'].show()


Canvas is updated and available in a tab in the default browser.

In [14]:
get_images_from_ids(knn_model.query(car))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 15.922ms     |
PROGRESS: | Done         |         | 100         | 231.222ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [15]:
show_neighbors =  lambda i : get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()

In [16]:
show_neighbors(8)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 21.475ms     |
PROGRESS: | Done         |         | 100         | 230.678ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [18]:
show_neighbors(26)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 19.328ms     |
PROGRESS: | Done         |         | 100         | 226.441ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [20]:
show_neighbors(122)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 24.188ms     |
PROGRESS: | Done         |         | 100         | 231.114ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [21]:
show_neighbors(2000)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 28.093ms     |
PROGRESS: | Done         |         | 100         | 235.882ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [23]:
image_train['label'].sketch_summary()


Out[23]:
+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+

In [24]:
image_categs = ['dog','cat','automobile','bird']

In [25]:
dog_df = image_train.filter_by('dog','label')

In [28]:
cat_df = image_train.filter_by('cat','label')
auto_df = image_train.filter_by('automobile','label')
bird_df = image_train.filter_by('bird','label')

In [30]:
dog_model = graphlab.nearest_neighbors.create(dog_df,features=['deep_features'],label='id')
cat_model = graphlab.nearest_neighbors.create(cat_df,features=['deep_features'],label='id')
auto_model = graphlab.nearest_neighbors.create(auto_df,features=['deep_features'],label='id')
bird_model = graphlab.nearest_neighbors.create(bird_df,features=['deep_features'],label='id')


PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.

In [31]:
image_test = graphlab.SFrame('image_test_data/')

In [35]:
image_test[0:1]['image'].show()


Out[35]:
id image label deep_features image_array
0 Height: 32 Width: 32 cat [1.13469004631, 0.0, 0.0,
0.0, 0.0366497635841, ...
[158.0, 112.0, 49.0,
159.0, 111.0, 47.0, ...
[1 rows x 5 columns]


In [62]:
cat_model.query(image_test[0:1])
#get_images_from_ids(cat_model.query(image_test[0:1]))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 23.597ms     |
PROGRESS: | Done         |         | 100         | 102.813ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[62]:
query_label reference_label distance rank
0 16289 34.623719208 1
0 45646 36.0068799284 2
0 32139 36.5200813436 3
0 25713 36.7548502521 4
0 331 36.8731228168 5
[5 rows x 4 columns]


In [64]:
image_train['id'==16289]['image'].show()

In [43]:
get_images_from_ids(knn_model.query(image_test[0:1]))


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 21.264ms     |
PROGRESS: | Done         |         | 100         | 210.747ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[43]:
id image label deep_features image_array
331 Height: 32 Width: 32 cat [0.0, 0.0,
0.510963916779, 0.0, ...
[45.0, 65.0, 92.0, 72.0,
95.0, 110.0, 106.0, ...
16289 Height: 32 Width: 32 cat [0.964287519455, 0.0,
0.0, 0.0, 1.12515509129, ...
[215.0, 219.0, 231.0,
215.0, 219.0, 232.0, ...
25713 Height: 32 Width: 32 cat [0.536971271038, 0.0,
0.0, 0.0894458889961, ...
[228.0, 222.0, 236.0,
224.0, 213.0, 222.0, ...
32139 Height: 32 Width: 32 cat [1.29409468174, 0.0, 0.0,
0.513800263405, ...
[217.0, 220.0, 205.0,
221.0, 227.0, 218.0, ...
45646 Height: 32 Width: 32 cat [0.983677506447, 0.0,
0.0, 0.0, 0.0, ...
[51.0, 42.0, 26.0, 56.0,
47.0, 31.0, 59.0, 50.0, ...
[5 rows x 5 columns]


In [45]:
dog_model.query(image_test[0:1])


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 21.094ms     |
PROGRESS: | Done         |         | 100         | 97.835ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[45]:
query_label reference_label distance rank
0 16976 37.4642628784 1
0 13387 37.5666832169 2
0 35867 37.6047267079 3
0 44603 37.7065585153 4
0 6094 38.5113254907 5
[5 rows x 4 columns]


In [72]:
get_images_from_ids(dog_model.query(image_test[0:1]))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 16.869ms     |
PROGRESS: | Done         |         | 100         | 86.298ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Canvas is accessible via web browser at the URL: http://localhost:65007/index.html
Opening Canvas in default web browser.

In [67]:
image_train['id'==16976]['image'].show()

In [40]:
import graphlab.aggregate as agg
cat_model.query(image_test[0:1]).groupby('query_label',operations = {'mean_distance' : agg.MEAN('distance')})


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 21.668ms     |
PROGRESS: | Done         |         | 100         | 93.05ms      |
PROGRESS: +--------------+---------+-------------+--------------+
Out[40]:
query_label mean_distance
0 36.1557307098
[1 rows x 2 columns]


In [46]:
dog_model.query(image_test[0:1]).groupby('query_label',operations = {'mean_distance' : agg.MEAN('distance')})


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 19.939ms     |
PROGRESS: | Done         |         | 100         | 96.441ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[46]:
query_label mean_distance
0 37.7707113618
[1 rows x 2 columns]


In [47]:
image_test_cat = image_test.filter_by('cat','label')
image_test_dog = image_test.filter_by('dog','label')
image_test_automobile = image_test.filter_by('automobile','label')
image_test_bird = image_test.filter_by('bird','label')

In [52]:
dog_dog_neighbors = dog_model.query(image_test_dog, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 299.768ms    |
PROGRESS: | Done         | 509000  | 100         | 340.176ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [48]:
dog_cat_neighbors = cat_model.query(image_test_dog, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 328.035ms    |
PROGRESS: | Done         | 509000  | 100         | 355.439ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [49]:
dog_auto_neighbors = auto_model.query(image_test_dog,k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 128000  | 25.1473     | 319.205ms    |
PROGRESS: | Done         | 509000  | 100         | 378.166ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [50]:
dog_brid_neighbors =  bird_model.query(image_test_dog,k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 120000  | 25.1046     | 303.156ms    |
PROGRESS: | Done         | 478000  | 100         | 343.45ms     |
PROGRESS: +--------------+---------+-------------+--------------+

In [ ]:


In [55]:
dog_distances=  graphlab.SFrame({'dog_dog':dog_dog_neighbors['distance'],'dog-cat':dog_cat_neighbors['distance'],'dog-automobile':dog_auto_neighbors['distance'],
                                 'dog-bird':dog_brid_neighbors['distance']})

In [56]:
dog_distances


Out[56]:
dog-automobile dog-bird dog-cat dog_dog
41.9579761457 41.7538647304 36.4196077068 33.4773590373
46.0021331807 41.3382958925 38.8353268874 32.8458495684
42.9462290692 38.6157590853 36.9763410854 35.0397073189
41.6866060048 37.0892269954 34.5750072914 33.9010327697
39.2269664935 38.272288694 34.778824791 37.4849250909
40.5845117698 39.1462089236 35.1171578292 34.945165344
45.1067352961 40.523040106 40.6095830913 39.0957278345
41.3221140974 38.1947918393 39.9036867306 37.7696131032
41.8244654995 40.1567131661 38.0674700168 35.1089144603
45.4976929401 45.5597962603 42.7258732951 43.2422832585
[1000 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.


In [57]:
def is_dog_correct(row):
    return (row['dog_dog']<row['dog-cat']) and (row['dog_dog']<row['dog-bird']) and (row['dog_dog']<row['dog-automobile'])

In [59]:
dog_distances.apply(is_dog_correct).sum()


Out[59]:
678

In [ ]:
cat_cat_neighbors = cat_model.query(image_test_dog, k=1)
dog_cat_neighbors = cat_model.query(image_test_dog, k=1)
dog_auto_neighbors = auto_model.query(image_test_dog,k=1)
dog_brid_neighbors =  bird_model.query(image_test_dog,k=1)
dog_distances=  graphlab.SFrame({'dog_dog':dog_dog_neighbors['distance'],'dog-cat':dog_cat_neighbors['distance'],'dog-automobile':dog_auto_neighbors['distance'],
                                 'dog-bird':dog_brid_neighbors['distance']})  
def is_dog_correct(row):
    return (row['dog_dog']<row['dog-cat']) and (row['dog_dog']<row['dog-bird']) and (row['dog_dog']<row['dog-automobile'])

In [ ]:
cat_distances.apply(is_cat_correct).sum()